/*==================================================
project:       Targeting - Baseline PMT with SE
Author:        David L. Vargas 
E-email:       davidvar@iadb.org
url:           
Dependencies:  
----------------------------------------------------
Creation Date:    
Modification Date:   
Do-file version:    01
References:          
Output:             
==================================================*/


 /*==================================================
               0: Program set up
 ==================================================*/
 
// Point estimates FUN 

cap program drop baseline_pmt 
program define baseline_pmt_co, rclass
    syntax [anything(name=subcmd id="subcommand")],  ///
    [           predvars(string)                      ///
				pmtvars(string)                      ///
				negonly(string)						///
                reps(string)						///
				FIXTrans(string)                   ///
				base(string)                       ///
                cutoff(string)                     ///
                clear                              ///
                pause                              ///
    ] 


    //========================== Preparation ========================== //
    cap frame drop estts 
    cap frame drop results 
	cap frame drop estts_avg
    scalar epovline = 137350
    scalar l_epovline = log(epovline)

    // if cutoff not provided use the l_epovline
    if "`cutoff'" == ""         loc cutoff = l_epovline

    *----------- 1.3 Define budget
    scalar prog_budget_colombia = 1997181822314 / 12  // Transfer expenses from (annual)
    scalar nfamilies_colombia = 50372424 / 2.9 // inhabitans by average family size
    scalar	ph_budget = prog_budget_colombia / nfamilies_colombia
	
**#    Bootstrap
    qui {
    frame create estts 
**#  Check if this is the right way of fixing the seed.
	sort id_vivienda_SISBEN
    set seed `=339487731'

    noi _dots 0, title(Loop running) reps(`reps') // use dot's intead of a message to go faster
        
    forv x = 1/`reps' {
		preserve 

        drop if missing(l_pp_inc_srv)

		sort id_vivienda_SISBEN
        
		set seed `=339487731+`x'-1'
        
        // An uniform have 50% probably of being > 0.5, let's use that for the split
        tempvar _sample_srv
        tempvar _random 
        tempvar _aux
        gen  `_random' = runiform() if year == 2019
        bys id_hogar_SISBEN : egen `_aux' = max(`_random')
        replace `_random' = `_aux'
        gen `_sample_srv' = 1
        replace `_sample_srv' = 2 if `_random' > 0.5
        lab var `_sample_srv' "Training or testing sample"
        lab define lsmaple 1 "Training" 2 "Testing", replace
        lab values `_sample_srv' lsmaple
        drop `_random' `_aux'
		

        // LASSO 
        if ("`subcmd'" == "lasso") {
            *lasso linear `incvar'  (`allwaysvars') `variables' [iw=pondera] if year == 2020 & `_sample_srv' == 1 & !missing(l_pp_inc_srv), nocons 
            lasso linear `pmtvars' [iw=pondera] if year == 2019 & `_sample_srv' == 1 
        } 
        else {
            // OLS 
            reg `pmtvars' [aw=pondera] if `_sample_srv'==1 & year==2019
        }
		
		foreach var in `predvars' {
			tempvar aux
			gen  `_aux'=`var' if year==2019
			tempvar maux
			egen `maux'=mean(`_aux'), by(id_hogar_SISBEN)
			replace `var'=`maux'
			drop `maux' `_aux'
		}
		
		xtset id_hogar_SISBEN year
		foreach w in `negonly' {
		if "`w'"!="no" {	
		tempvar lag	
		g `lag'=d.`w'
		replace `w'=l.`w' if `lag'>0 & year>2019
		drop `lag'
		}
		else {
			
		}
		}

		tempvar l_pred_inc_srv_shock
        if ("`subcmd'" == "lasso") {
            predict `l_pred_inc_srv_shock' if `_sample_srv' == 2 
        }
        else {
            predict `l_pred_inc_srv_shock' if `_sample_srv' == 2  , xb
        }
		
        /*==================================================
            2: Inclusion and exclusion change
        ==================================================*/

        *--------- 2.1. Poverty prediction 
        
        tempvar targeted
        tempvar covered0

        // Targets
        gen `targeted' = l_pp_inc_srv <= l_epovline 
        replace `targeted' = . if l_pp_inc_srv == .
		tab `targeted'

        // Coverage 
        *_pctile `l_pred_inc_srv_shock' if year == 2019 [aw=pondera], n(100)
        xtile aux1 = `l_pred_inc_srv_shock' [aw=pondera] if year == 2019, n(100)
        bys id_hogar_SISBEN: egen pctile_prd = max(aux1)
        drop aux1 
        *gen `covered0' = `l_pred_inc_srv_shock' <= r(r`cutoff') // POVERTY LINE 
        gen `covered0' = pctile_prd <= `cutoff' 
        replace `covered0'  = . if missing(`l_pred_inc_srv_shock')  
        tab `covered0'
        *--------- 2.1. inclusion and exclusion error
        tempvar inclusion_err0 inclusion_ok0 exclusion_err0 exclusion_ok0
        gen `inclusion_err0' = (`covered0' == 1) if !missing(`l_pred_inc_srv_shock') & `targeted' == 0 
        gen `inclusion_ok0' =  (`covered0' == 1) if !missing(`l_pred_inc_srv_shock') & `targeted' == 1
        gen `exclusion_err0' = (`covered0' == 0) if !missing(`l_pred_inc_srv_shock') & `targeted' == 1
        gen `exclusion_ok0' =  (`covered0' == 0) if !missing(`l_pred_inc_srv_shock') & `targeted' == 0
        
        loc k =0
        forv y = 2019/2021 {
            loc ++k
            mat estt = J(1,13,.)
            mat estt[1,1] = `y'

            loc j = 0
            foreach v in `covered0' `inclusion_err0' `inclusion_ok0' `exclusion_err0' `exclusion_ok0' {
                loc ++j
                sum `v' [aw = pondera] if year == `y' & !missing(`l_pred_inc_srv_shock') &  `_sample_srv' == 2
                scalar `v'_`y' = r(mean)
                mat estt[1,`=`j'+1'] = `v'_`y' 
            }

            // ------- CRRA 
			keep if `_sample_srv'==2


           * Define base
            sum `covered0' /*[aw=pondera]*/ if !missing(`l_pred_inc_srv_shock') & year == `y'  
            loc p_cov = r(mean)

            count if !missing(`l_pred_inc_srv_shock') & year == `y'  
            scalar base = r(N) * `p_cov' 
            *noi di as result base 
            
            *----------- 2.3 transfer size and budget 
            
            count if `l_pred_inc_srv_shock' != . & year == `y'  
            scalar ssize = r(N)
            if ("`fixtrans'" != "")  {
                tempvar hh_benefit
                gen `hh_benefit' = `fixtrans'  if year == `y'
                replace `hh_benefit' = `hh_benefit' * `covered0'  if year == `y'

                * store value
                scalar budget = `fixtrans' * base                  // Budget within usable sample 
                scalar budget_nat = (nfamilies_colombia * `p_cov') * `fixtrans' // National budget equivalency
                sca hh_benefits = `fixtrans'

            }
            else {
                * update budget 
                scalar budget = ph_budget * ssize                  // Budget within usable sample 
                scalar budget_nat = nfamilies_colombia * ph_budget // National budget equivalency

                tempvar hh_benefit
                gen `hh_benefit' = budget / base	if year == `y' 
                replace `hh_benefit' = 0 if base == 0 & year == `y' 
                replace `hh_benefit' = `hh_benefit' * `covered0' if year == `y'
                
                * store value
                sca hh_benefits = budget / base	
                if base == 0    sca hh_benefits = 0 
            }
           
                         
            tempvar pp_benefits_c
            gen `pp_benefits_c' = `hh_benefit' / n_members_srv if year == `y'
            replace  `pp_benefits_c' = 0 if missing(n_members_srv)
            replace `pp_benefits_c' = `pp_benefits_c' * `covered0' if year == `y'

            

            * total HH income
            tempvar income_hh
            *gen `income_hh' = `base'+income_pp_nt_win + `pp_benefits_c' if !missing(`l_pred_inc_srv_shock') & year == `y'  
            gen `income_hh' = `base'+income_pp_nt + `pp_benefits_c' if !missing(`l_pred_inc_srv_shock') & year == `y'  
            *----------- 2.3 Estiamtion CRRA by years
			scalar rho=1.5
            tempvar crra_l 
            gen `crra_l' = (((`income_hh')^(1-rho))*2)/(1-rho) if !missing(`l_pred_inc_srv_shock') 
	        sum `crra_l' [aw = pondera] if year == `y'
            sca totalU_`y'_l = r(mean)
			
			scalar rho=3
            tempvar crra 
            gen `crra' = (((`income_hh')^(1-rho))*2)/(1-rho) if !missing(`l_pred_inc_srv_shock') 
	        sum `crra' [aw = pondera] if year == `y'
            sca totalU_`y' = r(mean)
			
			scalar rho=4.5
            tempvar crra_u 
            gen `crra_u' = (((`income_hh')^(1-rho))*2)/(1-rho) if !missing(`l_pred_inc_srv_shock') 
	        sum `crra_u' [aw = pondera] if year == `y'
            sca totalU_`y'_u = r(mean)
            *noi di r(sum)
            
            // same but with my crra FUN
            *welfareho , incvar(income_pp_nt_win) benvar(`pp_benefits_c') w(pondera) p(rho) base(`base')
            *scalar ud_`y' = r(CRRA)

            // Share of hhs with 0 income after transfers. 
			
			tempvar zero_post
			gen `zero_post'=(`income_hh'<=`base') if !missing(`l_pred_inc_srv_shock')  
			sum `zero_post' [aw = pondera] if  year == `y' 
			scalar zero_`y'=r(mean)
			// Share of hhs under extreme poverty. 
			
			tempvar epov_post
			gen `epov_post'=(`income_hh'-`base'<epovline) if !missing(`l_pred_inc_srv_shock')  
			sum `epov_post' [aw = pondera] if  year == `y' 
			scalar epov_`y'=r(mean)
			
			
            * Store in matrix
                        * Store in matrix
            loc ++j
            mat estt[1,`=`j'+1'] = totalU_`y'
			mat estt[1,`=`j'+2'] = totalU_`y'_l
			mat estt[1,`=`j'+3'] = totalU_`y'_u
			mat estt[1,`=`j'+4'] = hh_benefits
            mat estt[1,`=`j'+5'] = budget_nat
			mat estt[1,`=`j'+6'] = zero_`y'
			mat estt[1,`=`j'+7'] = epov_`y'

            if (`y' == 2019)		mat dat = estt
            else 				    mat dat = dat \ estt

            mat colnames dat = year covered inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u  hh_benefits budget_nat zero epov 

            // detail prediction
            *mkmat id_hogar_SISBEN year `_sample_srv' `l_pred_inc_srv_shock' l_pp_inc_srv `pp_benefits_c' `covered0' `income_hh', mat(pred_det_`y')
            *mat colnames pred_det_`y' = id_hogar_SISBEN year smpl l_pred l_pp_inc_srv pp_benefits covered0 income_hh


        }

        *----------  1.2. store in a dataset
        frame change estts  
        drop _all
        svmat dat, names(col)
        gen n_rep = `x'
        if `x' > 1 {
            append using `estt'
        } 
        
        tempfile estt
        save `estt', replace 
        
        frame change default
        restore 

        loc t_reps = `x'
        *noi di as result "Rep `x' of `reps' done!"

        noi _dots `x' 0 

    } // end of reps loop

    frame create results 
    frame change results 

    use `estt', clear 

    mkmat _all , matrix(preds)

    foreach v in covered inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u hh_benefits budget_nat zero epov {
        forv y = 2019/2021 {
            sum `v' if year == `y', det
            scalar lbci_`v'_`y' = r(p5)
            scalar ubci_`v'_`y' = r(p95) 
        }
        
    }

	collapse (mean)  covered inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u hh_benefits budget_nat zero epov  , by(year)
    

    foreach v in covered inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u hh_benefits budget_nat zero epov {
        
        gen `v'_uci = .
        gen `v'_lci = .
        
        forv y = 2019/2021 {
            replace `v'_uci = ubci_`v'_`y' if year == `y'
            replace `v'_lci = lbci_`v'_`y' if year == `y'
        }
        
    }

    mkmat _all , matrix(estts)

    frame change default
    frame drop estts 
    
    // poverty prediction
    return matrix preds = preds
    return matrix estt = estts
    return scalar n_reps = `t_reps'
    *return matrix pred_det = pred_det_2019 

  } // end quietly 

    noi di as result "Results stored in matrix r(estt)"
    
end 

